capture cd  "D:\Dropbox\book_welfare\replication\"


***************************************************
* extract data from the Goodreads files, in chunks

forvalues j=0(1) 25 { 

local beg = `j'*100000 + 1
local end = `j'*100000+100000

 import delimited data\goodreads_books.json, delimiter(comma) rowrange(`beg':`end') clear bindquote(nobind)

gen pubyr=""
gen author_id ="" 
gen book_id=""
gen title=""
gen isbn = ""
gen publisher="" 
foreach k of varlist v* {
	capture replace pubyr=`k' if index(`k',"publication_year")>0
	capture replace author_id=`k' if index(`k',"author_id")>0
	capture replace book_id=`k' if index(`k',"book_id")>0
	capture replace title=`k' if index(`k',"title")>0 & index(`k',"title_without_series")==0
	capture replace isbn=`k' if index(`k',"isbn")>0 & index(`k',"isbn13")==0
	capture replace publisher=`k' if index(`k',"publisher")>0 
	drop `k'
	}
	

	


        local variables "pubyr  book_id title  isbn publisher"
        foreach x of local variables {
			split `x', parse(": ")
			replace `x'2=subinstr(`x'2,char(34),"",.)
			drop `x' `x'1 
			rename `x'2 `x'
			}
			
			
		split author_id, parse("{")
		split author_id2, parse(": ")
		replace author_id22=subinstr(author_id22,char(34),"",.)
		drop author_id 
		rename author_id22 author_id 
		drop author_id? author_id??
		
	
	
	
	
save data\partial`j'.dta, replace 
}

